I'm gonna overwrite a lot of this notebook's old content. I changed the way I'm calculating wt, and wanna test that my training worked.


In [1]:
from pearce.emulator import OriginalRecipe, ExtraCrispy, SpicyBuffalo
from pearce.mocks import cat_dict
import numpy as np
from os import path

In [2]:
import matplotlib
#matplotlib.use('Agg')
from matplotlib import pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set()

In [3]:
training_file = '/scratch/users/swmclau2/xi_gm_cosmo/PearceRedMagicXiGMCosmoFixedNd.hdf5'
#training_file = '/u/ki/swmclau2/des/PearceRedMagicXiCosmoFixedNdLowMsat.hdf5'
test_file = '/scratch/users/swmclau2/xi_gm_cosmo_test/PearceRedMagicXiGMCosmoFixedNdTest.hdf5'
#test_file = '/u/ki/swmclau2/des/PearceRedMagicXiCosmoFixedNdLowMsatTest.hdf5'

em_method = 'gp'
split_method = 'random'

In [4]:
a = 1.0
z = 1.0/a - 1.0

In [5]:
fixed_params = {'z':z}#, 'cosmo': 0}#, 'r':24.06822623}
v = np.array([ 2.68607108, -4.29605403, -12. , 12., -11.18424935, -8.39772494 , -8.88556884, 9.00797499 , 12. , 10.56121261, 12., 5.50280428, 0.59482347 , -6.24363245 , -8.26644505, 0.25342135, 0.96977814 , 12., 3.37450278, 12. , 7.15832218, 0.79122102, 11.9455156 , 1.64464882, 4.03020203])
v = np.array([ 0.2661017, 0.1054246, 1.1295944, 0.3643993, 0.2408568, 11.5649985, 5.6407612, 4.9071932, 10.6279446, 11.7621938, 4.7031938, 6.3770235, 11.7578699, 11.7547548, 8.4866085, -12.0550382, 1.8339794, 10.6161248, 2.2441632, 13.8155106, 10.6371797, 11.3512804, 7.342365 , 3.1795786, 3.7658774, 5.0188608, 4.6846614, 13.8155106, 13.8155106, 5.545777 , 13.8155106, -1.5383083, -13.8155106])

In [6]:
v = np.loadtxt('/home/users/swmclau2/Git/pearce/bin/optimization/sloppy_joes_result_xigm.npy')

In [7]:
param_names = ['ombh2', 'omch2', 'w0', 'ns', 'ln10As', 'H0', 'Neff', 'logM0', 'sigma_logM', 'logM1', 'alpha']

In [8]:
pnames = ['bias', 'amp']
pnames.extend(param_names)
pnames.append('amp')
pnames.extend(param_names)

In [9]:
from collections import defaultdict
metric = defaultdict(list)
for val, pname in zip(v, pnames):
    metric[pname].append(val)
# guessing cosmo comes first? zx_kernel_pnames = ['omch2', 'ombh2', 'ln10As', 'H0', 'ns', 'Neff', 'w0', 'logM1', 'alpha', 'logM0', 'sigma_logM',None, None, None, None, 'amp'] zx_kernel_pnames.extend(zx_kernel_pnames) zx_kernel_pnames.append('amp')
zx_metric = {} for pname, val in zip(zx_kernel_pnames, v): if pname is None: continue if pname not in zx_metric: zx_metric[pname] = [] zx_metric[pname].append(val)
m1 = [zx_metric[pname][0] for pname in param_names] m2 = [zx_metric[pname][0] for pname in param_names] a1, a2, a3 = zx_metric['amp']
from george.kernels import *
#kernel = ExpSquaredKernel(np.exp(m1), ndim=11) * ConstantKernel(a1, ndim=11)\ #+ Matern32Kernel(np.exp(m2), ndim=11) + ConstantKernel(a2, ndim=11)#+ WhiteKernel(a3, ndim=11) kernel = Matern52Kernel(np.exp(0),ndim=11)+ ConstantKernel(0, ndim = 11)
np.random.seed(0) emu = OriginalRecipe(training_file, method = em_method, fixed_params=fixed_params,\ custom_mean_function = 'linear', downsample_factor=0.02)#, #hyperparams = {'n_estimators': 500, # 'max_depth': 5})

In [10]:
np.random.seed(0)
emu = SpicyBuffalo(training_file, method = em_method,hyperparams = {'metric':metric}, fixed_params=fixed_params,
                 custom_mean_function = 'linear', downsample_factor = 0.1)
for pname, val in zip(emu._emulators[0].get_parameter_names(), emu._emulators[0].get_parameter_vector()): print pname, val
for pname in emu.get_param_names(): print pname, emu.get_param_bounds(pname)
n_leaves, n_overlap = 5000, 1 emu = ExtraCrispy(training_file, n_leaves, n_overlap, split_method, method = em_method, fixed_params=fixed_params, custom_mean_function = 'linear', downsample_factor = 1.0)

In [11]:
emu.scale_bin_centers


Out[11]:
array([  0.09581734,   0.13534558,   0.19118072,   0.27004994,
         0.38145568,   0.53882047,   0.76110414,   1.07508818,
         1.51860241,   2.14508292,   3.03001016,   4.28000311,
         6.04566509,   8.53972892,  12.06268772,  17.0389993 ,
        24.06822623,  33.99727318])

In [12]:
rbins = np.logspace(-1.1, 1.6, 19)
print (rbins[1:]+rbins[:-1])/2


[  0.09581733   0.13534558   0.19118072   0.27004994   0.38145568
   0.53882047   0.76110414   1.07508817   1.51860241   2.14508292
   3.03001016   4.28000311   6.04566509   8.53972892  12.06268772
  17.0389993   24.06822623  33.99727318]
#v = [ 5.30324096, -2.92565173, -4.42429766, -2.48246708, 10.0699553, # -8.69248089, 0.15083459, -4.72773171, 1.49655743, 1.17087069, # 0.97997249, -1.91976981, 7.17709699, -10.26944746, 10.37865121, # 0.50210416, 12.01525603, 5.89975769, 4.78413158, -1.33582408, # -9.94571436, 6.50103759, 9.70125021, -11.7173251, -0.10738552] #v = [-12., -5.56039378, 5.01999226, 9.63986449, -12., # -12., -2.13397232, 7.90203306, -12., 0.51574787, # 7.25074046, -1.28201782, 2.84486642, -12., -12., # 12., -7.75414731, 12., 12., -12., # 5.15929851, -12., 0.53369134, -3.81346879, -6.9684267, ] #v = [ -1.14028239, -12., -8.08712614 , -1.51590059 , 2.66822232, # -12., -10.17631757, 6.17448276, 6.2380102, -12., # -3.63076923, -1.3260091, 1.84463374, -2.57120539, 0.42652469, # 0.94596673, 2.04145112, -5.13442469, 12., 8.86492205, # -12., 2.44659378, 12., 2.10889078, 9.36958256] v = [ 12. , 12. , 12. , 12. , 12. , 12., 11.67920429 , 6.81656135 , 12. , 1.92715272 , 12. , 7.72884642, 12., -12. , 2.57697301, 12. , 8.85016763, 9.96558899 , 6.24704116 , 12. , 12. , -12. , -12. , -12., 12., ] v = np.ones_like(v)*12.0 #[ -8.16992654, 8.67223223, -2.21244202, -8.9034059, 2.12923655, # -9.85755136, 8.6699058, 1.24032812, -0.41040494 , 1.93942663, # 3.70688402, 5.53623883, 10.27060377, -8.96397854, -1.54633767, # 6.72846738, -9.38415988, -9.68900453, -3.5219898, -9.2396184, # 10.60952486, -2.12807193, -9.92944798, -2.87456867, -8.28144359] #[-2.30321746, -6., 2.74986581, -0.59132415, 0.74564814, -1.47243557, #5.38958859, 2.36982126, 5.35110678, 1.6798139, 6., 2.82272942, #6. ] #[ 6. -6. -2.95890662 -6. -3.63039154 6. # -0.93930637 -6. -6. 2.2162459 6. 2.75175138 # 5.09211467]
if hasattr(emu, "_emulator"): emu._emulator.set_parameter_vector(v) emu._emulator.recompute() else: for _emulator in emu._emulators: _emulator.set_parameter_vector(v) _emulator.recompute()

In [13]:
params = {}
for pname in emu.get_param_names():
    if pname == 'r':
        continue
    low, high = emu.get_param_bounds(pname)
    params[pname] = np.random.uniform(low, high)
    
print params


{'logM1': 14.267701848157271, 'Neff': 3.6095427706197905, 'logM0': 13.578807613765713, 'sigma_logM': 0.2502193887675708, 'H0': 73.31620420249872, 'w0': -0.5886073500474833, 'omch2': 0.10271736946651576, 'ln10As': 3.0459840518637358, 'alpha': 1.031390598923265, 'ns': 0.9832511796584299, 'ombh2': 0.021913242278466367}

In [14]:
pred_y = emu.emulate_wrt_r(params)[0]
print pred_y


[ 4.04004748  3.75891956  3.44550816  3.12585732  2.7892213   2.44664001
  2.07815025  1.68794447  1.28838445  0.93358763  0.6619232   0.43599471
  0.21960341 -0.01169283 -0.23920516 -0.49295352 -0.73753417 -1.0304472 ]

In [15]:
plt.plot(emu.scale_bin_centers, pred_y)
plt.xscale('log')


pred_y = emu.emulate(params)[0] print pred_y
plt.plot(emu.scale_bin_centers, pred_y) plt.xscale("log")

In [16]:
gof = emu.goodness_of_fit(test_file, statistic = 'frac')
#print gof.mean(), np.median(gof)
for g in gof:
    print g.mean(), np.median(g)


0.0234517600943 0.0121923981071
0.0192527749647 0.0109628666191
0.0188197683256 0.0125569191338
0.0174105517534 0.0115639485909
0.0180021921549 0.0138528889531
0.0170768295276 0.0115808824458
0.0170542635561 0.0126511857355
0.0164322821349 0.0117216605456
0.0162993074686 0.0112952571418
0.0184492766357 0.0138849150335
0.0207765933422 0.0165579597545
0.0172226956557 0.0125498110043
0.01102411654 0.0074401020925
0.0123994008471 0.009087794042
0.0137004409511 0.0117419100576
0.0113102910711 0.00830910180484
0.0119856512156 0.0067302607619
0.0224484907575 0.0184341607037

In [17]:
pred_y, data_y = emu.goodness_of_fit(test_file, statistic = None)

In [18]:
test_x, _, _, _ = emu.get_data(test_file, fixed_params=fixed_params)

train_x, _, _, _ = emu.get_data(training_file, fixed_params=fixed_params)

emu.get_param_names()


Out[18]:
['ombh2',
 'omch2',
 'w0',
 'ns',
 'ln10As',
 'H0',
 'Neff',
 'logM0',
 'sigma_logM',
 'logM1',
 'alpha']

In [19]:
print test_x.shape, train_x.shape
print test_x.shape[0]/(7*emu.n_bins), train_x.shape[0]/(40*emu.n_bins)


(126000, 12) (720000, 12)
1000 1000

In [20]:
idx1, idx2 = -5, -2
plt.scatter(test_x[0:1000*emu.n_bins:emu.n_bins, idx1], test_x[0:1000*emu.n_bins:emu.n_bins, idx2], alpha = 0.5)
plt.scatter(train_x[0:1000*emu.n_bins:emu.n_bins, idx1], train_x[0:1000*emu.n_bins:emu.n_bins, idx2], color = 'g', alpha = 0.5)


Out[20]:
<matplotlib.collections.PathCollection at 0x7f23c73c7d50>

In [21]:
resmat = np.zeros( (len(pred_y), pred_y[0].shape[0]))
for bin_no in xrange(len(pred_y)):
    py, dy = pred_y[bin_no], data_y[bin_no]
    resmat[bin_no] = 10**py - 10**dy
    
    #print np.sqrt(np.mean( (py - dy)**2)*py.shape[0]/(py.shape[0]-1) )

In [22]:
scovmat = resmat.dot(resmat.T)/(len(pred_y[0])-1)

In [23]:
print np.sqrt(np.diag(scovmat))


[  3.29453937e+02   1.64128669e+02   9.23737801e+01   5.12651187e+01
   2.82956875e+01   1.65647486e+01   9.05888752e+00   5.32473359e+00
   2.62057192e+00   1.25187794e+00   5.18920220e-01   1.83799943e-01
   5.62377534e-02   3.11488548e-02   1.88440419e-02   9.49574675e-03
   4.99840516e-03   4.12298800e-03]
np.savetxt('/home/users/swmclau2/Git/pearce/bin/mcmc/xigm_scov.npy', scovmat)
emu._save_as_default_metric(metric)

In [52]:
%%bash
cp /home/users/swmclau2/.local/lib/python2.7/site-packages/pearce/emulator/default_metrics.pkl /home/users/swmclau2/Git/pearce/emulator/default_metrics.pkl


cp: cannot create regular file ‘/home/users/swmclau2/Git/pearce/emulator/default_metrics.pkl’: No such file or directory
metric['amp'] = [1, 1] metric['bias'] = [0.0]
for downsample_factor in [ 0.01, 0.05, 0.2, 0.4, 0.6, 0.8, 1.0]: np.random.seed(0) emu = OriginalRecipe(training_file, method = em_method, fixed_params=fixed_params,\ custom_mean_function = 'linear', downsample_factor=downsample_factor) gof = emu.goodness_of_fit(test_file, statistic = 'log_frac') print downsample_factor, gof.mean(), np.median(gof)
emu._save_as_default_metric(metric)

In [53]:
#print g.mean(), np.median(g)
for g in gof: print g.mean(), np.median(g)

In [54]:
plt.hist(y)



NameErrorTraceback (most recent call last)
<ipython-input-54-3b2670a335aa> in <module>()
----> 1 plt.hist(y)

NameError: name 'y' is not defined

In [ ]:
plt.hist(pred_y)

In [ ]:
plt.hist(emu.y)
for idx in xrange(emu.n_bins): fig = plt.figure(figsize=(10, 4)) rat = np.abs(pred_y[idx] - y[idx])/np.abs(y[idx]) print rat.mean(), np.percentile(rat, [ 50, 68, 95]) plt.subplot(121) plt.hist(np.log10(rat), bins = 50 ); plt.subplot(122) plt.hist(pred_y[idx]-y[idx], bins = np.linspace(-1, 1, 51) ); plt.show()

In [ ]:
plt.hist(np.log10(gof));

In [ ]:
plt.hist(emu.y)

In [ ]:
for i in xrange(50):    
    params = {}

    for pname in emu.get_param_names():
        if pname == 'r':
            continue
        low, high = emu.get_param_bounds(pname)
        params[pname] = np.random.uniform(low, high)
    pred_y = emu.emulate(params)[0]
    print pred_y
    #print params

In [ ]:
for i, (g, r) in enumerate(zip(gof, emu.scale_bin_centers)):
    print r, g.mean(), np.median(g)
    #plt.hist(np.log10(g))
    #plt.show()
plt.hist(np.log10(gof) );
from sklearn.model_selection import train_test_split
x, y, yerr = emu.x, emu.y, emu.yerr downsample_idxs = np.random.choice(x.shape[0], size = int(0.08*x.shape[0]), replace = False) x,y, yerr = x[downsample_idxs, :], y[downsample_idxs], yerr[downsample_idxs] train_x, test_x, train_y, test_y, train_yerr, test_yerr = train_test_split(x, y, yerr, test_size = 0.1)

In [ ]:
n_cosmo_params = 7
loo_cosmo = emu.x[0, 0,  :n_cosmo_params]

loo_cosmo_idxs = np.all(emu.x[:, :,:n_cosmo_params] == loo_cosmo, axis =2)
train_x, train_y, train_yerr = emu.x[~loo_cosmo_idxs, :], emu.y[ ~loo_cosmo_idxs], emu.yerr[ ~loo_cosmo_idxs]
test_x, test_y, test_yerr = emu.x[loo_cosmo_idxs, :], emu.y[loo_cosmo_idxs], emu.yerr[loo_cosmo_idxs]
train_x, train_y, train_yerr = emu.x, emu.y, emu.yerr

In [ ]:
model = emu._emulator
model.compute(train_x, train_yerr)
test_x, test_y, test_yerr, _ = emu.get_data(test_file,fixed_params, None)

In [ ]:
pred_y = model.predict(train_y, test_x, False, False, False)*emu._y_std + emu._y_mean

In [ ]:
np.mean(np.abs((pred_y-test_y)/test_y))
#np.mean(np.abs((pred_y-train_y)/train_y))
for idx in xrange(50): plt.plot(emu.scale_bin_centers, ypred[idx*emu.n_bins:(idx+1)*emu.n_bins], label = 'Emu') plt.plot(emu.scale_bin_centers, emu.y[idx*emu.n_bins:(idx+1)*emu.n_bins], label = 'True') plt.title(np.sum(emu.x[(idx+1)*emu.n_bins, :-1]) ) plt.legend(loc='best') plt.xscale('log') plt.show()

In [ ]:
queue_skipper: True
        system: sherlock
        n_jobs: 400
        max_time: 6
resids = np.abs(emu.y*emu._y_std+emu._y_mean - ypred)

In [ ]:
np.mean(resids/(emu.y*emu._y_std+emu._y_mean))

In [ ]:
ypred.mean(), emu._y_mean
plt.plot(emu.scale_bin_centers, np.abs(gof.mean(axis = 0)) ) plt.plot(emu.scale_bin_centers, np.ones_like(emu.scale_bin_centers)*0.01) plt.plot(emu.scale_bin_centers, np.ones_like(emu.scale_bin_centers)*0.05) plt.plot(emu.scale_bin_centers, np.ones_like(emu.scale_bin_centers)*0.1) plt.loglog();
plt.plot(emu.scale_bin_centers, np.abs(gof.T),alpha = 0.1, color = 'b') plt.plot(emu.scale_bin_centers, np.ones_like(emu.scale_bin_centers)*0.01, lw = 2, color = 'k') plt.loglog();

In [ ]:
test_gof = emu.goodness_of_fit(test_file, statistic = 'log_frac')
print test_gof.mean()

In [ ]:
test_gof = emu.goodness_of_fit(test_file, statistic = 'frac')
print test_gof.mean()

In [ ]:
plt.hist(np.log10(test_gof));

In [ ]:
test_x

In [ ]:
(emu.x*emu._x_std) + emu._x_mean

In [ ]:
emu.get_param_names()

In [ ]:
test_x_white, test_y_white = (test_x - emu._x_mean)/(emu._x_std + 1e-5), (test_y - emu._y_mean)/(emu._y_std + 1e-5)

In [ ]:
model = emu._emulator

In [ ]:
pred_y_white = model.predict(emu.y, test_x_white, False, False, False)

In [ ]:
pred_y = pred_y_white*emu._y_std + emu._y_mean

In [ ]:
plt.plot(pred_y[:100], label = 'pred')
plt.plot(test_y[:100], label = 'truth')

plt.legend(loc = 'best')

In [ ]:
test_y.mean(), emu._y_mean, pred_y.mean()

In [ ]:
test_y.std(), emu._y_std, pred_y.std()

In [ ]:
plt.hist(pred_y_white, bins = np.linspace(-3, 3, 100), label = 'Pred')
plt.hist(test_y_white, bins = np.linspace(-3, 3, 100), label = 'Test', alpha = 0.4);
plt.legend(loc = 'best')

In [ ]:


In [ ]: